set.seed(31337)

rm(list = ls())

library(BayesTree)
library(sem)
library(foreign)
library(sampling)
library(xtable)

alpha <- 0.275

setwd("~/Dropbox/complieropt/Polmeth Archive")
source("icsw.R")

ggn <- read.dta("greengerbernickerson.dta")

attach(ggn)

Y <- ggn$voted01
D <- ggn$contact
Z <- ggn$treatmen

N <- length(D)

PrT <- ggn$prob

colnames(ggn)

white <- race == "W"
black <- race == "B"

age[age>101] <- mean(age[age <= 101],na.rm=TRUE)
age[is.na(age)] <- mean(age[age <= 101],na.rm=TRUE)
ageM <- rep(0,N)
ageM[age==mean(age[age <= 101],na.rm=TRUE)] <- 1
primary[is.na(primary)] <- mean(primary,na.rm=TRUE)
voted99a <- voted99
voted99a[is.na(voted99)] <-  mean(voted99,na.rm=TRUE)

dem <- party == "D" | party == "DEM" | party == "LIB"
rep <- party == "R" | party == "REP"
ind <- party == "I"

covs <- cbind(makeind(data.frame(factor(city))),famsize,white,black,voted00,voted99a,primary,age,ageM,dem,rep,ind)[,2:17]

w <- 1/PrT
w[Z == 0] <- 1/(1-PrT[Z==0])
w <- w/mean(w)

PrS <- PrT
PrS[Z==0] <- 1- PrS[Z==0]

cov.wt(cbind(Y,Z),wt=w)$cov[2,1]/cov.wt(cbind(D,Z),wt=w)$cov[2,1]

summary(lm(Y~Z,weights=w))

bssource <- data.frame(c(1:N))

dataf <- data.frame(Y,D,Z,w,covs)

detach(ggn)

attach(dataf)

glamfit <- glm(D~covs,weights=w,family=binomial(link="probit"),data=dataf,subset=Z==1)
summary(glamfit)

cscore <- predict.glm(glamfit,type="response",newdata=dataf)
summary(cscore)
sum(cscore < 1/N^alpha)
qcscore <- quantile(cscore, 1/N^alpha)
cscore[cscore < qcscore] <- qcscore


cov.wt(cbind(Y,Z),wt=w)$cov[2,1]/cov.wt(cbind(D,Z),wt=w)$cov[2,1]
cov.wt(cbind(Y,Z),wt=w/cscore)$cov[2,1]/cov.wt(cbind(D,Z),wt=w/cscore)$cov[2,1]


tsls.wfit(cbind(1,D,covs),Y,cbind(1,Z,covs),w)$coefficients

tsls.wfit(cbind(1,D,covs),Y,cbind(1,Z,covs),w/cscore)$coefficients

detach(dataf)

numiter <- 2500

test <- aest <- matrix(NA,nrow=numiter,ncol=18)

bsclu <- (as.numeric(factor(ggn$cityturf)))#+1000*Z)

for(i in 1:numiter) {

draw <- unlist(sapply(sort(unique(bsclu)), function(x) if(sum(bsclu==x) != 1) sample(c(1:N)[bsclu == x],replace=TRUE) else c(1:N)[bsclu==x]))

datafBS <- dataf[draw,]

glamfitBS <- glm(datafBS$D~covs[draw,],weights=datafBS$w,family=binomial(link="probit"),data=datafBS,subset=datafBS$Z==1)

cscoreBS <- predict.glm(glamfitBS,type="response",newdata=datafBS)

qcscoreBS <- quantile(cscoreBS, 1/N^alpha)
cscoreBS[cscoreBS < qcscoreBS] <- qcscoreBS

test[i,] <- tsls.wfit(cbind(1,datafBS$D,covs[draw,]),datafBS$Y,cbind(1,datafBS$Z,covs[draw,]),datafBS$w)$coefficients

aest[i,] <- tsls.wfit(cbind(1,datafBS$D,covs[draw,]),datafBS$Y,cbind(1,datafBS$Z,covs[draw,]),datafBS$w/cscoreBS)$coefficients

cat(i,"")
}

warnings()

tsls.wfit(cbind(1,D,covs),Y,cbind(1,Z,covs),w)$coefficients

tsls.wfit(cbind(1,D,covs),Y,cbind(1,Z,covs),w/cscore)$coefficients

colMeans(test)
apply(test,2,sd)
colMeans(aest)
apply(aest,2,sd)

mean(aest[,2] <= test[,2],na.rm=TRUE)

permclu <- (as.numeric(factor(ggn$cityturf)))

perm.test <- function(D,Z,Xmat,cscore,numperm=50,genoud=FALSE) {
	test.dist <- rep(NA,numperm)
	
	for (i in 1:numperm) {
				
		Xmats <- data.frame(Xmat[unlist(sapply(sort(unique(permclu)), function(x) if(sum(permclu==x) != 1) sample(c(1:N)[permclu == x],replace=FALSE) else c(1:N)[permclu==x])),])
		
		
		glamfitP <- glm(D~as.matrix(Xmats),weights=w,family=binomial(link="probit"),subset=Z==1)
		cscoreP <- predict.glm(glamfitP,type="response",newdata=Xmats)
		test.dist[i] <- sum((D - cscoreP*Z)^2)
		cat(i,"")
	}
	
	test.stat <- sum((D - cscore*Z)^2)
	p.value <- mean(test.dist <= test.stat)
	output <- list(test.dist,test.stat,p.value)
	names(output) <- c("test.dist","test.stat","p.value")
	return(output)
}


reorder <- unlist(sapply(sort(unique(permclu)), function(x) c(1:N)[permclu == x]))

permout <- perm.test(D[reorder],Z[reorder],covs,cscore[reorder],numperm=numiter)



save.image(file="ggnresults.RData")

load("ggnresults.RData")

#### cscore matrix

xtable(round(cor(cbind(cscore,covs))[,c(1,7:17)],digits=2))

#### cscore p-value

permout$p.value

#### Results

# 2SLS Estimate
tsls.wfit(cbind(1,D,covs),Y,cbind(1,Z,covs),w)$coefficients
# 2SLS Bootstrap CIs
apply(test,2,quantile,c(0.025,0.975))

# ICSW Estimate
tsls.wfit(cbind(1,D,covs),Y,cbind(1,Z,covs),w/cscore)$coefficients
# ICSW Bootstrap CIs
apply(aest,2,quantile,c(0.025,0.975))

# Bootstrap CIs
quantile(aest[,2]-test[,2],c(0.025,0.050,0.95,0.975))

round(cbind(tsls.wfit(cbind(1,D,covs),Y,cbind(1,Z,covs),w)$coefficients,tsls.wfit(cbind(1,D,covs),Y,cbind(1,Z,covs),w/cscore)$coefficients),digits=3)

round(cbind(apply(test,2,quantile,0.025),apply(test,2,quantile,0.975),apply(aest,2,quantile,0.025),apply(aest,2,quantile,0.975)),digits=3)

quartz(width=4,height=4)

par(mfrow = c(1, 1), pty = "s",family="Gill Sans MT",font.main=1,cex=.7)#,cex.lab=0.9,cex=1.1,cex.axis=0.9)

plot(density(permout$test.dist),xlim=c(permout$test.stat-1,max(permout$test.dist)+2),main="Green, Gerber and Nickerson (2003) Permutation Test",xlab="SSR")
lines(c(permout$test.stat,permout$test.stat),c(-100,100))
lines(c(-100,10000),c(0,0))
